options(repos = c(CRAN = "https://cran.r-project.org"))
if (!requireNamespace("groundhog", quietly = TRUE)) {
install.packages("groundhog")
library("groundhog")
}
pkgs <- c("magrittr", "data.table", "stringr", "lubridate", "knitr", "glue",
"sandwich", "lmtest",
"ggplot2", "ggpubr", "rstatix", "patchwork")
groundhog::groundhog.library(pkg = pkgs,
date = "2023-09-25")3 Figures
This document explains how to reproduce the figures presented in the paper.
3.1 Install Packages
We install the following packages using the groundhog package manager to increase computational reproducibility.
3.2 Read Data
# data <- data.table::fread(file = "../data/processed/full.csv")
data <- readRDS(file="../data/processed/full.Rda")data[, communication := as.factor(communication)]
data[, communication := factor(communication, levels = c("point", "interval", "both"))]
data[, stage := as.factor(stage)]
data[, stage := factor(stage, levels = c("1", "2"))]3.3 Figure 1
To create Figure 3.1 (and fig-2), we create a wrapper function that we can call several times. As all the other figures presented in this document, Figure 3.1 consists of three panels, top, left, and right that are relatively similar. We thus, save both space and sources of error by creating a wrapper function plot_bars() that creates bar plots and annotates them with test statistics.
plot_bars <- function(response = "b", surprise_sub = NA, limits = ylim(-1.02, 1.02)){
if(response == "b"){
y_1 = 0.6
y_2 = 0.4
} else {
y_1 = 1.4
y_2 = 1
}
if(!is.na(surprise_sub)){
# Plot bottom panels
tmp <- data[surprise == surprise_sub]
names(tmp)[names(tmp) == response] <- 'outcome'
if(surprise_sub){
title <- "Surprising Condition"
} else {
title <- "Confirming Condition"
}
test_stats_1 <- tmp %>%
group_by(communication) %>%
wilcox_test(formula = outcome ~ stage,
paired = T) %>%
adjust_pvalue(method = "none") %>%
add_significance(p.col = "p.adj",
cutpoints = c(0, 0.01, 0.05, 0.1, 1),
symbols = c( "***", "**", "*", "ns")) %>%
as.data.table()
test_stats_2 <- tmp %>%
group_by(stage) %>%
wilcox_test(formula = outcome ~ communication) %>%
adjust_pvalue(method = "none") %>%
add_significance(p.col = "p.adj",
cutpoints = c(0, 0.01, 0.05, 0.1, 1),
symbols = c( "***", "**", "*", "ns")) %>%
as.data.table()
test_stats_2 <- test_stats_2[stage == 2]
plot_bottom <- ggplot(data = tmp,
mapping = aes(x = as.factor(communication),
y = outcome)) +
geom_bar(aes(fill = stage),
position = "dodge",
stat = "summary",
fun = "mean") +
limits +
scale_fill_manual(values=c("black", "gray")) +
theme_classic() +
stat_pvalue_manual(data = test_stats_2,
label = "{p} ({p.adj.signif})",
step.group.by = "stage",
tip.length = 0,
step.increase = 0.1,
y.position = y_1) +
stat_pvalue_manual(data = test_stats_1,
label = "{p} ({p.adj.signif})",
y.position = y_2,
tip.length = 0,
x = "communication") +
labs(title = title,
x = "Communication",
y = glue("Ambiguity index {response} (Baillon)"))
rm(tmp)
plot_bottom
} else {
# Plot the top panel
tmp <- data
names(tmp)[names(tmp) == response] <- 'outcome'
title <- "Both Conditions"
test_stats_1 <- tmp %>%
group_by(surprise) %>%
wilcox_test(formula = outcome ~ stage,
paired = T) %>%
adjust_pvalue(method = "none") %>%
add_significance(p.col = "p.adj",
cutpoints = c(0, 0.01, 0.05, 0.1, 1),
symbols = c( "***", "**", "*", "ns")) %>%
as.data.table()
test_stats_2 <- tmp %>%
group_by(stage) %>%
wilcox_test(formula = outcome ~ surprise) %>%
adjust_pvalue(method = "none") %>%
add_significance(p.col = "p.adj",
cutpoints = c(0, 0.01, 0.05, 0.1, 1),
symbols = c( "***", "**", "*", "ns")) %>%
as.data.table()
test_stats_2 <- test_stats_2[stage == 2]
plot_top <- ggplot(data = tmp,
mapping = aes(x = as.factor(surprise),
y = outcome)) +
geom_bar(aes(fill = stage),
position = "dodge",
stat = "summary",
fun = "mean") +
limits +
scale_fill_manual(values=c("black", "gray")) +
theme_classic() +
stat_pvalue_manual(data = test_stats_2,
label = "{p} ({p.adj.signif})",
step.group.by = "stage",
tip.length = 0,
step.increase = 0.1,
y.position = y_1) +
stat_pvalue_manual(data = test_stats_1,
label = "{p} ({p.adj.signif})",
y.position = y_2,
tip.length = 0,
x = "surprise") +
labs(title = title,
x = " Surprising Condition",
y = glue("Ambiguity index {response} (Baillon)"))
rm(tmp)
plot_top
}
}top <- plot_bars(response = "b", surprise_sub = NA)
left <- plot_bars(response = "b", surprise_sub = FALSE)
right <- plot_bars(response = "b", surprise_sub = TRUE)
(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")3.4 Figure 2
Next, we use the wrapper function again but visualize another outcome using the response == a argument.
top <- plot_bars(response = "a", limits = ylim(-2.01, 4.01), surprise_sub = NA)
left <- plot_bars(response = "a", limits = ylim(-2.01, 4.01), surprise_sub = FALSE)
right <- plot_bars(response = "a", limits = ylim(-2.01, 4.01), surprise_sub = TRUE)
(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")3.5 Figure 3
Figure 3.3 also presents three panels. In contrast to Figure 3.1 and Figure 3.2, these panels visualize confidence intervals of our estimator of interest: the interaction of stage and surprise (top panel) as well as the interaction between stage and communication in the lower two panels.
The corresponding data stems from many OLS regressions and are computed in for-loops. Even though the code differs slightly, it is very repetitive between the three panels, which is why we only explain the top panel
3.5.1 Top Panel
Before calculating the data based on OLS regressions, we specify three different models: none represents only treatment variables, demographics extends the former by adding demographic variables, and all extends the former two by also adding all remaining covariates.
none <- c("surprise", "treated", "surprise*treated")
demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")
all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")Next, we run a nested loop that iterates through all three models and within these models through both ambiguity indices a and b. From these ols regressions, we compute clustered standard errors using the {{sandwich}} and {{lmtest}} package. The resuling data is stored in a temporary data.table that is appended to an initially empty data.table called ci_top.
ci_top <- data.table()
for(RHS in c("none", "demographics", "all")){
for(LHS in c("a", "b")){
ols <- lm(formula = reformulate(termlabels = get(RHS),
response = LHS),
data = data)
tmp <- coefci(x = ols,
parm = "surpriseTRUE:treatedTRUE",
vcov = vcovCL(x = ols,
cluster = data$participant.label,
type = "HC1"),
level = 0.95) %>%
data.table(model = RHS, outcome = LHS)
tmp[, center := (`2.5 %` + `97.5 %`)/2]
ci_top <- rbind(ci_top, tmp)
}
}The result looks as follows. The data.table provides a column for the 2.5% and 97.5% confidence interval, the center of the two, as well as the ols specification as presented in model and outcome.
ci_top %>% head(n = 7) %>% kable()| 2.5 % | 97.5 % | model | outcome | center |
|---|---|---|---|---|
| -0.0461365 | 0.0608289 | none | a | 0.0073462 |
| -0.0085981 | 0.0374200 | none | b | 0.0144110 |
| -0.0370710 | 0.0733050 | demographics | a | 0.0181170 |
| -0.0071204 | 0.0418846 | demographics | b | 0.0173821 |
| -0.0371322 | 0.0733663 | all | a | 0.0181170 |
| -0.0071476 | 0.0419118 | all | b | 0.0173821 |
Finally, we plot this data set (ci_top) using the {{ggplot2}} package. The resulting object is stored as top and will be assembled together with the other two panels.
top <- ggplot(data = ci_top,
mapping = aes(y = outcome)) +
geom_pointrange(mapping = aes(x = center,
y = outcome,
xmin = `2.5 %`,
xmax = `97.5 %`,
shape = factor(model)),
data = ci_top,
position = position_dodge(width = 0.4),
fatten=5,
alpha=.8) +
geom_vline(xintercept = 0,
color = "red",
alpha = 0.66,
show.legend = FALSE) +
theme_bw() +
labs(#title = "(a) Effect of contradiction (relative to confirmation)",
y = "Ambiguity Index",
x = "Estimate",
shape="Control variables")3.5.2 Left Panel
none <- c("communication", "treated", "communication*treated")
demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")
all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")ci_left <- data.table()
for(RHS in c("none", "demographics", "all")){
for(LHS in c("a", "b")){
ols <- lm(formula = reformulate(termlabels = get(RHS),
response = LHS),
data = data,
subset = (surprise == FALSE))
for(communication in c("interval", "both")){
tmp <- coefci(x = ols,
parm = paste0("communication", communication, ":treatedTRUE"),
vcov = vcovCL(x = ols,
cluster = data[surprise == FALSE,
participant.label],
type = "HC1"),
level = 0.95) %>%
data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
tmp[, center := (`2.5 %` + `97.5 %`)/2]
ci_left <- rbind(ci_left, tmp)
}
}
}ci_left %>% head(n = 7) %>% kable()| 2.5 % | 97.5 % | model | outcome | center |
|---|---|---|---|---|
| -0.1231424 | 0.0699195 | none | a (interval) | -0.0266115 |
| -0.1603397 | 0.0317751 | none | a (both) | -0.0642823 |
| -0.0085202 | 0.0705807 | none | b (interval) | 0.0310303 |
| -0.0069006 | 0.0744238 | none | b (both) | 0.0337616 |
| -0.1032781 | 0.0924975 | demographics | a (interval) | -0.0053903 |
| -0.1519774 | 0.0393496 | demographics | a (both) | -0.0563139 |
| -0.0106052 | 0.0750051 | demographics | b (interval) | 0.0321999 |
left <- ggplot(data = ci_left,
mapping = aes(y = outcome)) +
geom_pointrange(mapping = aes(x = center,
y = outcome,
xmin = `2.5 %`,
xmax = `97.5 %`,
shape = factor(model)),
data = ci_left,
position = position_dodge(width = 0.4),
fatten=5,
alpha=.8) +
geom_vline(xintercept = 0,
color = "red",
alpha = 0.66,
show.legend = FALSE) +
theme_bw() +
labs(#title = "(b) Effect of information treatments (both, interval) relative to best guess, confirmation treatments",
y = "Ambiguity Index",
x = "Estimate",
shape="Control variables")3.5.3 Right Panel
ci_right <- data.table()
for(RHS in c("none", "demographics", "all")){
for(LHS in c("a", "b")){
ols <- lm(formula = reformulate(termlabels = get(RHS),
response = LHS),
data = data,
subset = (surprise == TRUE))
for(communication in c("interval", "both")){
tmp <- coefci(x = ols,
parm = paste0("communication", communication, ":treatedTRUE"),
vcov = vcovCL(x = ols,
cluster = data[surprise == TRUE,
participant.label],
type = "HC1"),
level = 0.95) %>%
data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
tmp[, center := (`2.5 %` + `97.5 %`)/2]
ci_right <- rbind(ci_right, tmp)
}
}
}ci_right %>% head(n = 7) %>% kable()| 2.5 % | 97.5 % | model | outcome | center |
|---|---|---|---|---|
| -0.0664967 | 0.1147544 | none | a (interval) | 0.0241288 |
| -0.0495534 | 0.1325661 | none | a (both) | 0.0415063 |
| -0.0339037 | 0.0457592 | none | b (interval) | 0.0059278 |
| -0.0619626 | 0.0165198 | none | b (both) | -0.0227214 |
| -0.0798350 | 0.1115715 | demographics | a (interval) | 0.0158682 |
| -0.0675663 | 0.1267932 | demographics | a (both) | 0.0296134 |
| -0.0490060 | 0.0352795 | demographics | b (interval) | -0.0068632 |
right <- ggplot(data = ci_right,
mapping = aes(y = outcome)) +
geom_pointrange(mapping = aes(x = center,
y = outcome,
xmin = `2.5 %`,
xmax = `97.5 %`,
shape = factor(model)),
data = ci_right,
position = position_dodge(width = 0.4),
fatten=5,
alpha=.8) +
geom_vline(xintercept = 0,
color = "red",
alpha = 0.66,
show.legend = FALSE) +
theme_bw() +
labs(#title = "(c) Effect of information treatments (both, interval) relative to best guess, contradiction treatments",
y = "Ambiguity Index",
x = "Estimate",
shape="Control variables") +
scale_y_discrete(position = "right")3.5.4 Assemble Top, Left and Right Panels
As before, we use the {{patchwork}} package to assmble the plot objects.
(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")3.6 Figure 4
The process of producing Figure 3.4 is very similar to the process of Figure 3.3. The only difference is that we do not loop through the outcomes a and b but the events’ matching probabilities (E1, E2, ..., E23).
To loop through these variables, we use a regex (E\\d) to create a vector called events.
events <- names(data) %>% str_subset(pattern = "E\\d")Because the process is, from now on, so similar to the process of Figure 3.3, we do not comment it any further here.
3.6.1 Top Panel
none <- c("surprise", "treated", "surprise*treated")
demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")
all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")ci_top <- data.table()
for(RHS in c("none", "demographics", "all")){
for(LHS in events){
ols <- lm(formula = reformulate(termlabels = get(RHS),
response = LHS),
data = data)
tmp <- coefci(x = ols,
parm = "surpriseTRUE:treatedTRUE",
vcov = vcovCL(x = ols,
cluster = data$participant.label,
type = "HC1"),
level = 0.95) %>% data.table(model = RHS, outcome = LHS)
tmp[, center := (`2.5 %` + `97.5 %`)/2]
ci_top <- rbind(ci_top, tmp)
}
}ci_top %>% head(n = 7) %>% kable()| 2.5 % | 97.5 % | model | outcome | center |
|---|---|---|---|---|
| 4.983983 | 10.142448 | none | E1 | 7.563215 |
| 1.600247 | 6.299091 | none | E12 | 3.949669 |
| 1.327104 | 6.115620 | none | E13 | 3.721362 |
| -7.842483 | -3.429947 | none | E2 | -5.636215 |
| -12.800137 | -7.599836 | none | E23 | -10.199987 |
| -6.107878 | -1.334793 | none | E3 | -3.721335 |
| 4.421240 | 9.844463 | demographics | E1 | 7.132852 |
top <- ggplot(data = ci_top,
mapping = aes(y = outcome)) +
geom_pointrange(mapping = aes(x = center,
y = outcome,
xmin = `2.5 %`,
xmax = `97.5 %`,
shape = factor(model)),
data = ci_top,
position = position_dodge(width = 0.4),
fatten=5,
alpha=.8) +
geom_vline(xintercept = 0,
color = "red",
alpha = 0.66,
show.legend = FALSE) +
theme_bw() +
labs(y = "Ambiguity Index",
x = "Estimate",
shape="Control variables")3.6.2 Left Panel
none <- c("communication", "treated", "communication*treated")
demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")
all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")ci_left <- data.table()
for(RHS in c("none", "demographics", "all")){
for(LHS in events){
ols <- lm(formula = reformulate(termlabels = get(RHS),
response = LHS),
data = data,
subset = (surprise == FALSE))
for(communication in c("interval", "both")){
tmp <- coefci(x = ols,
parm = paste0("communication", communication, ":treatedTRUE"),
vcov = vcovCL(x = ols,
cluster = data[surprise == FALSE,
participant.label],
type = "HC1"),
level = 0.95) %>%
data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
tmp[, center := (`2.5 %` + `97.5 %`)/2]
ci_left <- rbind(ci_left, tmp)
}
}
}ci_left %>% head(n = 7) %>% kable()| 2.5 % | 97.5 % | model | outcome | center |
|---|---|---|---|---|
| -6.746653 | 1.5693594 | none | E1 (interval) | -2.5886468 |
| -7.472010 | 0.5541086 | none | E1 (both) | -3.4589505 |
| -7.466795 | 0.4839339 | none | E12 (interval) | -3.4914306 |
| -4.297893 | 3.4859217 | none | E12 (both) | -0.4059856 |
| -2.758720 | 5.5611888 | none | E13 (interval) | 1.4012346 |
| -6.134510 | 2.0764806 | none | E13 (both) | -2.0290148 |
| -4.899051 | 2.4400333 | none | E2 (interval) | -1.2295086 |
left <- ggplot(data = ci_left,
mapping = aes(y = outcome)) +
geom_pointrange(mapping = aes(x = center,
y = outcome,
xmin = `2.5 %`,
xmax = `97.5 %`,
shape = factor(model)),
data = ci_left,
position = position_dodge(width = 0.4),
fatten=5,
alpha=.8) +
geom_vline(xintercept = 0,
color = "red",
alpha = 0.66,
show.legend = FALSE) +
theme_bw() +
labs(y = "Ambiguity Index",
x = "Estimate",
shape="Control variables")3.6.3 Right Panel
ci_right <- data.table()
for(RHS in c("none", "demographics", "all")){
for(LHS in events){
ols <- lm(formula = reformulate(termlabels = get(RHS),
response = LHS),
data = data,
subset = (surprise == TRUE))
for(communication in c("interval", "both")){
tmp <- coefci(x = ols,
parm = paste0("communication", communication, ":treatedTRUE"),
vcov = vcovCL(x = ols,
cluster = data[surprise == TRUE,
participant.label],
type = "HC1"),
level = 0.95) %>%
data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
tmp[, center := (`2.5 %` + `97.5 %`)/2]
ci_right <- rbind(ci_right, tmp)
}
}
}ci_right %>% head(n = 7) %>% kable()| 2.5 % | 97.5 % | model | outcome | center |
|---|---|---|---|---|
| -9.6135941 | -0.0515961 | none | E1 (interval) | -4.8325951 |
| -2.8115517 | 7.0630438 | none | E1 (both) | 2.1257460 |
| -4.0358433 | 4.3361386 | none | E12 (interval) | 0.1501477 |
| -0.4741903 | 8.1055554 | none | E12 (both) | 3.8156825 |
| -8.3619000 | -0.3107652 | none | E13 (interval) | -4.3363326 |
| -6.0444150 | 2.5918118 | none | E13 (both) | -1.7263016 |
| -1.4998061 | 6.0782483 | none | E2 (interval) | 2.2892211 |
right <- ggplot(data = ci_right,
mapping = aes(y = outcome)) +
geom_pointrange(mapping = aes(x = center,
y = outcome,
xmin = `2.5 %`,
xmax = `97.5 %`,
shape = factor(model)),
data = ci_right,
position = position_dodge(width = 0.4),
fatten=5,
alpha=.8) +
geom_vline(xintercept = 0,
color = "red",
alpha = 0.66,
show.legend = FALSE) +
theme_bw() +
labs(y = "Ambiguity Index",
x = "Estimate",
shape="Control variables") +
scale_y_discrete(position = "right")3.6.4 Assemble Top, Left and Right Panels
(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")